Group 6: SIGMA mbha0014 Mayunk Bharadwaj pgre0007 Priscila Grecov

Presentation: TUE 4-6.00PM - order 6 - question Mitch

Our question:

  1. How does relative humidity in Melbourne change throughout the day and year? When is it most uncomfortable to walk around the city (humidity below 30% or above 60%)?

We need just the sensors that measure the relative humidity that are the ones with sensor_id = 5b and sensor_id = 5b.EPA-1hr

library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
unique(sensor_readingsRH$sensor_id)
[1] "5b.EPA-1h" "5b"       
unique(sensor_readingsRH$month)
 [1] Nov Dec Jan Sep Feb Aug Mar Apr May Jul Jun Oct
Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
apply(sensor_readingsRH, 2, function(x) sum(is.na(x)))
        id    site_id  sensor_id      value local_time       type      units       date  timeStamp       hour      month    hourMin 
         0          0          0          4          0          0          0          0          0          0          0          0 
unique(sensor_readingsRH$site_id)
[1] "arc1045" "arc1046" "arc1047" "arc1048" "arc1050"
unique(sensor_readingsRH$date)
  [1] "2019-11-15" "2019-11-16" "2019-11-18" "2019-11-19" "2019-11-20" "2019-11-17" "2019-11-21" "2019-11-22" "2019-11-23" "2019-11-24"
 [11] "2019-11-25" "2019-11-26" "2019-11-27" "2019-11-28" "2019-11-29" "2019-11-30" "2019-12-01" "2019-12-03" "2019-12-02" "2019-12-04"
 [21] "2019-12-06" "2019-12-07" "2019-12-05" "2019-12-08" "2019-12-09" "2019-12-10" "2019-12-11" "2019-12-12" "2019-12-13" "2019-12-14"
 [31] "2019-12-15" "2019-12-16" "2019-12-17" "2019-12-19" "2019-12-20" "2019-12-18" "2019-12-22" "2019-12-21" "2019-12-23" "2019-12-24"
 [41] "2019-12-25" "2019-12-26" "2019-12-28" "2019-12-29" "2019-12-31" "2019-12-27" "2019-12-30" "2020-01-01" "2020-01-03" "2020-01-02"
 [51] "2020-01-04" "2020-01-05" "2020-01-06" "2020-09-09" "2020-01-07" "2020-01-08" "2020-01-09" "2020-01-10" "2020-01-11" "2020-01-12"
 [61] "2020-01-13" "2020-01-14" "2020-01-16" "2020-01-15" "2020-01-17" "2020-01-18" "2020-01-19" "2020-01-21" "2020-01-22" "2020-01-23"
 [71] "2020-01-24" "2020-01-20" "2020-01-25" "2020-01-26" "2020-01-28" "2020-01-27" "2020-01-29" "2020-01-31" "2020-01-30" "2020-02-02"
 [81] "2020-02-01" "2020-02-03" "2020-02-05" "2020-02-06" "2020-02-07" "2020-02-04" "2020-02-08" "2020-02-09" "2020-02-10" "2020-02-11"
 [91] "2020-02-12" "2020-02-13" "2020-08-18" "2020-02-14" "2020-02-16" "2020-02-17" "2020-02-18" "2020-02-19" "2020-02-20" "2020-02-15"
[101] "2020-02-21" "2020-02-22" "2020-02-23" "2020-02-24" "2020-02-26" "2020-02-25" "2020-09-25" "2020-02-27" "2020-02-29" "2020-03-01"
[111] "2020-02-28" "2020-03-02" "2020-03-03" "2020-03-04" "2020-03-05" "2020-03-06" "2020-03-07" "2020-03-08" "2020-03-09" "2020-03-10"
[121] "2020-08-15" "2020-03-11" "2020-03-13" "2020-03-14" "2020-03-12" "2020-03-15" "2020-03-16" "2020-03-17" "2020-03-18" "2020-03-19"
[131] "2020-03-20" "2020-03-22" "2020-08-08" "2020-03-23" "2020-03-24" "2020-03-25" "2020-03-21" "2020-03-26" "2020-03-28" "2020-03-27"
[141] "2020-03-29" "2020-03-30" "2020-03-31" "2020-04-01" "2020-04-02" "2020-04-03" "2020-04-04" "2020-04-05" "2020-04-06" "2020-04-07"
[151] "2020-04-08" "2020-04-10" "2020-04-09" "2020-04-11" "2020-04-12" "2020-04-13" "2020-04-14" "2020-04-15" "2020-04-16" "2020-04-17"
[161] "2020-04-18" "2020-04-19" "2020-04-20" "2020-04-21" "2020-04-23" "2020-04-22" "2020-04-24" "2020-04-26" "2020-04-25" "2020-04-27"
[171] "2020-04-28" "2020-04-29" "2020-04-30" "2020-05-01" "2020-05-02" "2020-05-04" "2020-05-05" "2020-05-03" "2020-05-07" "2020-05-10"
[181] "2020-05-08" "2020-05-06" "2020-05-13" "2020-05-14" "2020-05-11" "2020-05-09" "2020-05-15" "2020-05-16" "2020-05-17" "2020-05-12"
[191] "2020-05-18" "2020-05-19" "2020-05-20" "2020-05-21" "2020-05-22" "2020-05-23" "2020-07-27" "2020-05-24" "2020-05-25" "2020-05-26"
[201] "2020-05-27" "2020-05-28" "2020-05-29" "2020-05-30" "2020-05-31" "2020-06-01" "2020-06-02" "2020-06-03" "2020-06-04" "2020-06-05"
[211] "2020-06-06" "2020-06-07" "2020-06-08" "2020-06-09" "2020-06-10" "2020-06-11" "2020-06-12" "2020-06-13" "2020-06-14" "2020-06-15"
[221] "2020-06-16" "2020-06-17" "2020-06-18" "2020-06-19" "2020-06-20" "2020-06-21" "2020-06-22" "2020-06-23" "2020-06-24" "2020-06-25"
[231] "2020-07-31" "2020-07-03" "2020-06-28" "2020-08-25" "2020-07-01" "2020-07-13" "2020-08-22" "2020-07-10" "2020-07-15" "2020-07-22"
[241] "2020-07-19" "2020-07-18" "2020-07-25" "2020-07-28" "2020-08-06" "2020-06-30" "2020-07-09" "2020-07-12" "2020-07-16" "2020-07-04"
[251] "2020-08-03" "2020-07-08" "2020-08-09" "2020-08-12" "2020-07-30" "2020-07-05" "2020-07-06" "2020-07-07" "2020-07-02" "2020-06-26"
[261] "2020-06-29" "2020-06-27" "2020-09-05" "2020-07-11" "2020-10-19" "2020-09-01" "2020-08-28" "2020-10-01" "2020-08-02" "2020-07-24"
[271] "2020-09-27" "2020-09-13" "2020-08-14" "2020-09-26" "2020-09-18" "2020-09-22" "2020-07-21" "2020-10-10" "2020-10-06" "2020-10-15"
[281] "2020-08-23" "2020-09-14" "2020-08-26" "2020-07-20" "2020-10-29" "2020-10-24" "2020-07-26" "2020-08-29" "2020-09-10" "2020-07-14"
[291] "2020-10-02" "2020-08-19" "2020-08-17" "2020-08-16" "2020-07-23" "2020-08-30" "2020-07-17" "2020-08-05" "2020-07-29" "2020-10-11"
[301] "2020-08-07" "2020-10-20" "2020-08-04" "2020-08-01" "2020-08-13" "2020-09-02" "2020-08-20" "2020-08-10" "2020-08-11" "2020-09-19"
[311] "2020-09-06" "2020-09-23" "2020-08-27" "2020-10-28" "2020-08-21" "2020-10-25" "2020-09-08" "2020-09-04" "2020-09-17" "2020-08-24"
[321] "2020-10-05" "2020-08-31" "2020-09-21" "2020-09-03" "2020-10-07" "2020-09-15" "2020-09-11" "2020-09-07" "2020-10-14" "2020-10-23"
[331] "2020-10-16" "2020-09-28" "2020-10-18" "2020-10-03" "2020-09-20" "2020-09-24" "2020-09-12" "2020-10-12" "2020-10-30" "2020-09-16"
[341] "2020-10-21" "2020-10-08" "2020-10-09" "2020-09-29" "2020-10-04" "2020-10-27" "2020-10-17" "2020-10-13" "2020-10-26" "2020-09-30"
[351] "2020-10-22"

1ST) HOW RELATIVITY HUMIDITY CHANGE THROUGH THE YEAR - EXPLORING THE DATA BY MONTH OVER THE YEAR

By the graph below, the different sensors show the same pattern over the year. Then, we can join all the sensors taken the average or median.

Just three days during all the year presented daily average below 30%: 21/11/2019, 20/12/2019, 30/01/2020.

sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(site_id, month) %>%
  summarise(median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=month, y=median_RH, group=site_id, color=site_id)) +
  geom_line() +
  theme_ipsum() 

sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=month, group = 1)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  theme_ipsum() 

sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  filter(mean_RH >= 70 | median_RH >= 70)

The months of May to August are the worst moths where the RH averages are greater than 70%. November seems to be the month with the lowest RH average (but in this dataset we have just the 2nd half data for November).

sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop')

2ND) HOW RELATIVITY HUMIDITY CHANGE THROUGH THE DAY - EXPLORING THE DATA BY HOUR OVER THE DAY.

sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(site_id, hour) %>%
  summarise(mean_RH = mean(value), .groups = 'drop') %>%
  ggplot(aes(x=hour, y=mean_RH, group=site_id, color=site_id)) +
  geom_line() +
  theme_ipsum() +
  scale_x_continuous(breaks = seq(0,23,1))

sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(hour) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=hour)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="black", linetype = "dashed") +
  geom_line(aes(y=30), color="purple", linetype = "dashed") +
  theme_ipsum() +
  scale_x_continuous(breaks = seq(0,23,1))

library(plotly)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
facet1 <- sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month, hour) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=hour)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="black", linetype = "dotted") +
  geom_line(aes(y=30), color="purple", linetype = "dotted") +
  facet_wrap(vars(month)) +
  scale_x_continuous(breaks = seq(0,23,1)) +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 7)) 

ggplotly(facet1)  %>%
        config(displayModeBar = F)
`group_by_()` is deprecated as of dplyr 0.7.0.
Please use `group_by()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month, hour) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  filter(mean_RH <= 30 | median_RH <= 30)
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(hour, month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=month,  group = 1)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="black", linetype = "dashed", size=0.5) +
  geom_line(aes(y=30), color="purple", linetype = "dashed", size=0.5) +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 7)) +
  facet_wrap(vars(hour)) 

sensor_readingsRH %>%
  filter(sensor_id == "5b") %>%
  filter(!is.na(value)) %>%
  group_by(hourMin2) %>%
  summarise(mean_RH = mean(value), .groups = 'drop') %>%
  ggplot(aes(x=hourMin2)) +
  geom_line(aes(y=mean_RH), color="blue") +
  theme_ipsum() +
  scale_x_time()
---
title: "Ass4 - ETC5523 - Relativity Humidity Melbourn"
output: html_notebook
---

Group 6: SIGMA
mbha0014	Mayunk Bharadwaj
pgre0007	Priscila Grecov

Presentation: TUE 4-6.00PM - order 6 - question Mitch

Our question:

4. How does relative humidity in Melbourne change throughout the **day** and **year**? **When** is it **most uncomfortable** to walk around the city (humidity **below 30%** or **above 60%**)?

```{r reading-datasets}
sensor_readings <- read.csv("Microclimate_Sensor_Readings.csv")
sensor_local <- read.csv("Microclimate_Sensor_Locations.csv")
```

We need just the sensors that measure the relative humidity that are the ones with sensor_id = 5b and sensor_id = 5b.EPA-1hr


```{r filtering-sensor}
unique(sensor_readings$sensor_id)

library(dplyr)
sensor_readingsRH <- sensor_readings %>% filter(sensor_id %in% c("5b", "5b.EPA-1h"))
```

```{r showing-newtable}
unique(sensor_readingsRH$sensor_id)
```

```{r}
sensor_readingsRH$date <- as.Date(sensor_readingsRH$local_time)
sensor_readingsRH$timeStamp <- as.POSIXct(sensor_readingsRH$local_time, format = "%Y/%m/%d %I:%M:%S %p")
sensor_readingsRH$hour <- as.numeric(format(sensor_readingsRH$timeStamp, "%H"))
sensor_readingsRH$month <- as.numeric(format(sensor_readingsRH$date, "%m"))
sensor_readingsRH$hourMin <- format(sensor_readingsRH$timeStamp, "%H:%M")
unique(sensor_readingsRH$hour)
unique(sensor_readingsRH$month)
unique(sensor_readingsRH$hourMin)

sensor_readingsRH$month <- factor(month.abb[sensor_readingsRH$month], levels = month.abb)
unique(sensor_readingsRH$month)

# library(chron)
# sensor_readingsRH$hourMin2 <- chron(times=sensor_readingsRH$hourMin, format = "h:m")
# sensor_readingsRH$hourMin2 <- format(strptime(sensor_readingsRH$hourMin2,"%H:%M:%S"),'%H:%M')
```

```{r}
apply(sensor_readingsRH, 2, function(x) sum(is.na(x)))
```

```{r}
sensor_readingsRH %>% 
  filter(is.na(value))
```

```{r}
unique(sensor_readingsRH$site_id)
```

```{r}
library(ggplot2)
library(ggridges)
library(tidyverse)
library(hrbrthemes)
library(viridis)

unique(sensor_readingsRH$date)
```

1ST) HOW RELATIVITY HUMIDITY CHANGE THROUGH THE YEAR - EXPLORING THE DATA BY MONTH OVER THE YEAR

```{r during-year}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(site_id,date) %>%
  summarise(mean_RH = mean(value), .groups = 'drop') %>%
  ggplot(aes(x=date, y=mean_RH, group=site_id, color=site_id)) +
  geom_line() +
  geom_line(aes(y=60), color="black", linetype = "dotted") +
  geom_line(aes(y=30), color="black", linetype = "dashed") +
  theme_ipsum() +
  xlab("") +
  scale_x_date(date_labels = "%Y-%m-%d", date_breaks  = "10 days") +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 8)) 
```
By the graph below, the different sensors show the same pattern over the year. Then, we can join all the sensors taken the average or median.

```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(date) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=date)) +
  geom_line(aes(y=mean_RH), color="blue") +
  #geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="red", linetype = "dashed") +
  geom_line(aes(y=30), color="red", linetype = "dashed") +
  theme_ipsum() +
  xlab("") +
  scale_x_date(date_labels = "%Y-%m-%d", date_breaks  = "10 days") +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 8)) 
```
```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(date) %>%
  summarise(mean_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=date, y=mean_RH)) +
  geom_line() +
  geom_line(aes(y=70), color="red", linetype = "dashed") +
  geom_line(aes(y=30), color="red", linetype = "dashed") +
  theme_ipsum() +
  xlab("") +
  scale_x_date(date_labels = "%Y-%m-%d", date_breaks  = "10 days") +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 8)) 
```
```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(date) %>%
  summarise(median_RH = median(value), .groups = 'drop') %>%
  filter(median_RH <= 30)
```
Just three days during all the year presented daily average below 30%: 21/11/2019, 20/12/2019, 30/01/2020.

```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(site_id, month) %>%
  summarise(median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=month, y=median_RH, group=site_id, color=site_id)) +
  geom_line() +
  theme_ipsum() 
```


```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=month, group = 1)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  theme_ipsum() 
```
```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  filter(mean_RH >= 70 | median_RH >= 70)
```
The months of May to August are the worst moths where the RH averages are greater than 70%. November seems to be the month with the lowest RH average (but in this dataset we have just the 2nd half data for November).

```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop')
```
2ND) HOW RELATIVITY HUMIDITY CHANGE THROUGH THE DAY - EXPLORING THE DATA BY HOUR OVER THE DAY.

```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(site_id, hour) %>%
  summarise(mean_RH = mean(value), .groups = 'drop') %>%
  ggplot(aes(x=hour, y=mean_RH, group=site_id, color=site_id)) +
  geom_line() +
  theme_ipsum() +
  scale_x_continuous(breaks = seq(0,23,1))
```
```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(hour) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=hour)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="black", linetype = "dashed") +
  geom_line(aes(y=30), color="purple", linetype = "dashed") +
  theme_ipsum() +
  scale_x_continuous(breaks = seq(0,23,1))
```
```{r}
library(plotly)
facet1 <- sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month, hour) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=hour)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="black", linetype = "dotted") +
  geom_line(aes(y=30), color="purple", linetype = "dotted") +
  facet_wrap(vars(month)) +
  scale_x_continuous(breaks = seq(0,23,1)) +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 7)) 

ggplotly(facet1)  %>%
        config(displayModeBar = F)

```

```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(month, hour) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  filter(mean_RH <= 30 | median_RH <= 30)
```
```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b.EPA-1h") %>%
  group_by(hour, month) %>%
  summarise(mean_RH = mean(value), median_RH = median(value), .groups = 'drop') %>%
  ggplot(aes(x=month,  group = 1)) +
  geom_line(aes(y=mean_RH), color="blue") +
  geom_line(aes(y=median_RH), color="red") +
  geom_line(aes(y=70), color="black", linetype = "dashed", size=0.5) +
  geom_line(aes(y=30), color="purple", linetype = "dashed", size=0.5) +
  theme(axis.text.x=element_text(angle=60, hjust=1, size = 7)) +
  facet_wrap(vars(hour)) 
```

```{r}
sensor_readingsRH %>%
  filter(sensor_id == "5b") %>%
  filter(!is.na(value)) %>%
  group_by(hourMin2) %>%
  summarise(mean_RH = mean(value), .groups = 'drop') %>%
  ggplot(aes(x=hourMin2)) +
  geom_line(aes(y=mean_RH), color="blue") +
  theme_ipsum() +
  scale_x_time()
```












